# This file replicates all models presented in:
# Who joins and who fights? Explaining tacit coalition behavior among civil war actors
# Published in International Studies Quarterly
# Authors:
# 	Martin C. Steinwand; Department of Government; University of Essex
# 		Nils W. Metternich; Department of Political Science; University College London

# For question with the replication code please contact: 
# m.steinwand@essex.ac.uk or n.metternich@ucl.ac.uk

# Replicated in: R version 3.6.2 
# OS: macOS 12.3.1
# Date: 5 August 2022


#------------------------
# Please ensure the following packages are installed
# Loading packages
rm(list=ls())
library(fields)
library(dplyr)
library(gtools)
require(MASS)
require(ggplot2)
library(gridExtra)
library(texreg)
library(verification)
library(pscl)
# The out-commented code below installs two packages that are needed
#install.packages("R2admb")
#install.packages("glmmADMB", 
 #  repos=c("http://glmmadmb.r-forge.r-project.org/repos",
  #          getOption("repos")),
   # type="source")
library(glmmADMB)


#------------------------
#The next step requires the creation of two separate folders:
# inputData and graphics

# Please update YOURPATH to pathIN; pathGraph according to your setup

mystem <- Sys.info()["user"] #system name

#------------------------
#Set paths
if(
Sys.info()["user"]==mystem){pathIN="YOURPATH/inputData";pathGraph="YOURPATH/graphics"}


#------------------------
# Running complete analysis for different coalition definitions as explained in manuscript
ending <- c("","_cont","_cont_tri") #this used to estimate different coalition definitions
data.ending <- c("",".cont",".cont.tri") # this is used to call data with different coalition definitions

# The analysis is conducted for all coalition definitions and Tables and Figures are replicated below
for(iii in 1:3){
setwd(pathIN)
	load(paste("final.data.for.analysis",data.ending[iii],".rda",sep=""))
		final.data$count <- as.numeric(final.data$count)

#create time lags
lag.1 <- final.data[,c("year","coalID","count","ccode")]
	names(lag.1)[3] <- "count.l1"
		lag.1$year <- lag.1$year+1
			final.data <- merge(final.data,lag.1,by.x=c("year","coalID","ccode"),by.y=c("year","coalID","ccode"),all.x=TRUE)
				final.data$count.l1[is.na(final.data$count.l1)]  <- 0 

#calculate the country wide gini (equal to grand coalition gini)
country.gini <- final.data[final.data$coalition.size==final.data$count.possible.coalitions,]
	want <- which(colnames(country.gini) %in% c("year","ccode","gini.coalition"))
		country.gini <- country.gini[,want]
			colnames(country.gini) <- c("year","ccode","country.gini")
				final.data <- merge(final.data,country.gini,by.x=c("year","ccode"),by.y=c("year","ccode"),all.x=TRUE)

# adjust for table display
final.data$W.centdist.fight.mean <- final.data$W.centdist.fight.mean/1000

#------------------------
# Estimation of models

#Define model.frame for estimated models model
variables.needed <- as.formula(count~government.coal+fight.in.coalition+ratio.sum.area+coalition.size+count.possible.coalitions+W.centdist.fight.mean+gini.coalition+ethnic.linkages.norm+count.l1+ccode+coalID)
run.data <- model.frame(variables.needed ,data=final.data)



#Specification for estimated models
specification0 <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions)
specification1 <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1)
specification2 <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1+I(ratio.sum.area^2))
specification3 <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1+I(ratio.sum.area^2)+I(W.centdist.fight.mean^2))

specification2interact <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+I(W.centdist.fight.mean*ethnic.linkages.norm)+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1+I(ratio.sum.area^2))


specification0.infl <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions|1)
specification1.infl <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1|1)
specification2.infl <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1+I(ratio.sum.area^2)|1)
specification3.infl <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1+I(ratio.sum.area^2)+I(W.centdist.fight.mean^2)|1)


specification2.infl.build.1 <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1+I(ratio.sum.area^2)|1+government.coal)
specification2.infl.build.2 <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1+I(ratio.sum.area^2)|1+government.coal+count.l1)
specification2.infl.build.3 <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1+I(ratio.sum.area^2)|1+government.coal+count.l1+factor(ccode))


model.0 <- glm.nb(specification0,data=run.data[run.data$count.possible.coalitions>=3 & run.data$count.possible.coalitions!=run.data$coalition.size,]) 
model.0 <- glm.nb(specification0,data=run.data[run.data$count.possible.coalitions>=3,])
model.1 <- glm.nb(specification1,data=run.data[run.data$count.possible.coalitions>=3,])
model.2 <- glm.nb(specification2,data=run.data[run.data$count.possible.coalitions>=3,])
model.3 <- glm.nb(specification3,data=run.data[run.data$count.possible.coalitions>=3,])

if(iii==1){
model.2.interact <- glm.nb(specification2interact,data=run.data[run.data$count.possible.coalitions>=3,])
}

model.2.infl.build.1 <- zeroinfl(specification2.infl.build.1,dist = c("negbin"),data=run.data[run.data$count.possible.coalitions>=3,])
model.2.infl.build.2 <- zeroinfl(specification2.infl.build.2,dist = c("negbin"),data=run.data[run.data$count.possible.coalitions>=3,])
model.2.infl.build.3 <- zeroinfl(specification2.infl.build.3,dist = c("negbin"),data=run.data[run.data$count.possible.coalitions>=3,])


#------------------------
#Figure 6
if(iii==1){
n.data.1 <- data.frame(t(apply(run.data[run.data$count.possible.coalitions>=3,1:11],2,FUN=function(x) mean(x,na.rm=TRUE))))
n.data.1[,"government.coal"] <- 0
n.data.1[,"count.l1"] <- 2
gini <-  seq(0,1,0.01)

n.data.2 <- n.data.1
for (i in 1:(length(gini)-1)){
    n.data.1 <- rbind(n.data.1,n.data.2)
}

n.data.1 <- data.frame(n.data.1)
n.data.1$gini.coalition <- gini

n.data.1 <- cbind(n.data.1, predict(model.2, n.data.1, type = "response", se.fit=TRUE))
n.data.1 <- within(n.data.1, {
  predL <- fit
  LL <- fit - 1.96 * se.fit
  UL <- fit + 1.96 * se.fit
})

effect.plot <- ggplot(n.data.1, aes(gini.coalition, predL)) +
  geom_ribbon(aes(ymin = LL, ymax = UL), alpha = .25) +
  geom_line() +
  labs(x = "Coalition Inequality (Gini)", y = "Predicted Tacit Coalition Months") +
  theme(legend.position="none")
  
setwd(pathGraph)    
pdf(paste("plot_effectm2Gini",ending[iii],".pdf",sep=""),height=5,width=5)
effect.plot
dev.off()



plot_top <- ggplot(run.data[run.data$count.possible.coalitions>=3,], aes(gini.coalition, fill="orange")) + 
geom_histogram(aes(xmin = 0, xmax = 1),bin=1/20)+
  scale_fill_manual(values = c("darkgrey")) + 
  theme(legend.position = "none")+
  labs(x = "Coalition Inequality (Gini)", y = "Frequency") +
  geom_rug(col="black",alpha=.1)
  
  
setwd(pathGraph)    
pdf(paste("plot_effectm2GiniHist",ending[iii],".pdf",sep=""),height=5,width=5)
plot_top
dev.off()

  
}

#------------------------
#Figure 7 left panel

if(iii==1){

n.data.1 <- data.frame(t(apply(run.data[run.data$count.possible.coalitions>=3,1:11],2,FUN=function(x) mean(x,na.rm=TRUE))))
n.data.1[,"government.coal"] <- 0
n.data.1[,"count.l1"] <- 2
ratio <-  seq(0,1,0.01)

n.data.2 <- n.data.1
for (i in 1:(length(ratio)-1)){
    n.data.1 <- rbind(n.data.1,n.data.2)
}

n.data.1 <- data.frame(n.data.1)
n.data.1$ratio.sum.area <- ratio


fight.distance <- seq(min(run.data[run.data$count.possible.coalitions>=3,]$W.centdist.fight.mean),max(run.data[run.data$count.possible.coalitions>=3,]$W.centdist.fight.mean),by=0.5)
fight.distance <- rep(fight.distance,each=dim(n.data.1)[1])
n.data.1 <- cbind(fight.distance,n.data.1)
n.data.1$W.centdist.fight.mean <- n.data.1$fight.distance

n.data.1 <- cbind(n.data.1, predict(model.2, n.data.1, type = "response", se.fit=TRUE))
n.data.1 <- within(n.data.1, {
  predL <- fit
  LL <- fit - 1.96 * se.fit
  UL <- fit + 1.96 * se.fit
})


effect.plot.2 <- ggplot(n.data.1, aes(x=ratio.sum.area, y=predL,group=W.centdist.fight.mean,fill=factor(W.centdist.fight.mean))) +
  geom_ribbon(aes(ymin = LL, ymax = UL), alpha = .25) +
  geom_line(aes(color=factor(W.centdist.fight.mean))) +
  labs(x = "Proportion of Area Active ", y = "Predicted Tacit Coalition Months") +
  scale_fill_grey() +
  scale_color_grey() +
  labs(fill = "Average Distance in Coalition ",color = "Average Distance in Coalition ")+
  theme(legend.position="bottom")
  
 
setwd(pathGraph)    
pdf(paste("plot_effectm2Distance",ending[iii],".pdf",sep=""),height=5,width=5)
effect.plot.2
dev.off()

}

#------------------------
#Figure 7 right panel

if(iii==1){
n.data.1 <- data.frame(t(apply(run.data[run.data$count.possible.coalitions>=3,1:11],2,FUN=function(x) mean(x,na.rm=TRUE))))
n.data.1[,"government.coal"] <- 0
n.data.1[,"count.l1"] <- 2
ratio <-  seq(0,1,0.01)

n.data.2 <- n.data.1
for (i in 1:(length(ratio)-1)){
    n.data.1 <- rbind(n.data.1,n.data.2)
}

n.data.1 <- data.frame(n.data.1)
n.data.1$ratio.sum.area <- ratio


ethnic.distance <- c(min(run.data[run.data$count.possible.coalitions>=3,]$ethnic.linkages.norm),10,max(run.data[run.data$count.possible.coalitions>=3,]$ethnic.linkages.norm))
ethnic.distance <- rep(ethnic.distance,each=dim(n.data.1)[1])
n.data.1 <- cbind(ethnic.distance,n.data.1)
n.data.1$ethnic.linkages.norm <- n.data.1$ethnic.distance

n.data.1 <- cbind(n.data.1, predict(model.2, n.data.1, type = "response", se.fit=TRUE))
n.data.1 <- within(n.data.1, {
  predL <- fit
  LL <- fit - 1.96 * se.fit
  UL <- fit + 1.96 * se.fit
})



effect.plot.3 <- ggplot(n.data.1, aes(x=ratio.sum.area, y=predL,group=ethnic.linkages.norm,fill=factor(ethnic.linkages.norm))) +
  geom_ribbon(aes(ymin = LL, ymax = UL), alpha = .25) +
  geom_line(aes(color=factor(ethnic.linkages.norm))) +
  labs(x = "Proportion of Area Active ", y = "Predicted Tacit Coalition Months") +
  scale_fill_grey() +
  scale_color_grey() +
  labs(fill = "Ethnic Linkage Density",color = "Ethnic Linkage Density")+
  theme(legend.position="bottom")

setwd(pathGraph)    
pdf(paste("plot_effectm2Ethnic",ending[iii],".pdf",sep=""),height=5,width=5)
effect.plot.3
dev.off()

}

#------------------------
#Figure A4 left panel

p.model.1 <- predict(model.2,type="response")
	y.model.1 <- model.2$y
		dev.model.1 <- y.model.1-p.model.1
			predict.eval <- data.frame(y.model.1,p.model.1,dev.model.1)
				colnames(predict.eval) <- c("y","p","d")

p1 <- ggplot(predict.eval,aes(x=as.factor(y),y=d))+
		geom_boxplot()+geom_point()+ylim(c(-40,40))+ xlab("Observed Realized Tacit Coalitions Months")+ ylab(paste("Prediction Error, ","RMSE=",print(round(sqrt(mean((model.1$y-p.model.1)^2)),digits=3)),sep="")) 
		
setwd(pathGraph)    
pdf(paste("plot_InM2",ending[iii],".pdf",sep=""),height=5/1.63,width=5)
p1 
dev.off()
		


#------------------------
#Random Effects Model

run.data$ccode.factor <- as.factor(as.character(run.data$ccode))
	specification2.r <- as.formula(count~gini.coalition+ratio.sum.area+W.centdist.fight.mean+ethnic.linkages.norm+coalition.size+fight.in.coalition+government.coal+count.possible.coalitions+count.l1+I(ratio.sum.area^2)+(1|ccode.factor))

if(iii!=3){
model.2.r <- glmmadmb(specification2.r,data=run.data[run.data$count.possible.coalitions>=3,],family="nbinom", 
zeroInflation=FALSE, admb.opts=admbControl(noinit=TRUE,shess=TRUE)) 
}

if(iii==3){
model.2.r <- glmmadmb(specification2.r,data=run.data[run.data$count.possible.coalitions>=3,],family="nbinom", 
zeroInflation=TRUE, admb.opts=admbControl(noinit=TRUE,shess=TRUE)) 
}

#------------------------
#Generate tables for all models


#------------------------
#Table 2 ; Table A3 ; Table A4
setwd(pathGraph)

if(iii==1){
	
coef.names <- c("Intercept","Coalition Gini","Proportion of Area Active","Average Distance in Coalition","Ethnic Linkage","Coalition Size","Infighting","Government Coalition","Possible Coalitions","Dependent Lag","Sq. Proportion of Area Active","Sq. Average Distance in Coalition")

texreg(list(model.0,model.1, model.2,model.2.r, model.3),custom.coef.names = coef.names,label=paste("tab:coefNB",ending[iii],sep=""),file=paste("tab_coef_nb",ending[iii],".tex",sep=""),stars = c(0.001, 0.01, 0.05, 0.1),symbol = "\\circ",custom.model.names = c("Base Model", "DV-lag Model","Main Model","RE", "Sq Model"),scalebox = 0.8,caption="Estimates for the k-adic negative binomial models. Unit of analysis is a potential coalition-year. Outcome variables are the months a potential coalition is realized as a tacit coalition in a given year. Models include country-years with at least three actors (including the government).",use.packages = FALSE)}

if(iii==2){
	
coef.names <- c("Intercept","Coalition Gini","Proportion of Area Active","Average Distance in Coalition","Ethnic Linkage","Coalition Size","Infighting","Government Coalition","Possible Coalitions","Dependent Lag","Sq. Proportion of Area Active")

texreg(list(model.2,model.2.r),custom.coef.names = coef.names,label=paste("tab:coefNB",ending[iii],sep=""),file=paste("tab_coef_nb",ending[iii],".tex",sep=""),stars = c(0.001, 0.01, 0.05, 0.1),symbol = "\\circ",custom.model.names = c("Main Model","RE"),scalebox = 0.8,caption="Estimates for the k-adic negative binomial models. Unit of analysis is a potential coalition-year. Outcome variables are the months a potential coalition is realized as a tacit coalition in a given year. Models include country-years with at least three actors (including the government).",use.packages = FALSE)}

if(iii==3){
coef.names <- c("Intercept","Coalition Gini","Proportion of Area Active","Average Distance in Coalition","Ethnic Linkage","Coalition Size","Infighting","Government Coalition","Possible Coalitions","Dependent Lag","Sq. Proportion of Area Active")

texreg(list(model.2,model.2.r),custom.coef.names = coef.names,label=paste("tab:coefNB",ending[iii],sep=""),file=paste("tab_coef_nb",ending[iii],".tex",sep=""),stars = c(0.001, 0.01, 0.05, 0.1),symbol = "\\circ",custom.model.names = c("Main Model","RE"),scalebox = 0.8,caption="Estimates for the k-adic negative binomial models. Unit of analysis is a potential coalition-year. Outcome variables are the months a potential coalition is realized as a tacit coalition in a given year. Models include country-years with at least three actors (including the government).",use.packages = FALSE)}



#------------------------
#Table A2

if(iii==1){
coef.names.inf.built <- c("Intercept","Coalition Gini","Proportion of Area Active","Average Distance in Coalition","Ethnic Linkage","Coalition Size","Infighting","Government Coalition","Possible Coalitions","Dependent Lag","Sq. Proportion of Area Active","Theta","Intercept","Government Coalition","Dependent Lag",rep("Country",38))

texreg(list(model.2.infl.build.2, model.2.infl.build.3),custom.coef.names = coef.names.inf.built,label=paste("tab:coefNBinfbuild",ending[iii],sep=""),file=paste("tab_coef_nb_inf_build",ending[iii],".tex",sep=""),stars = c(0.001, 0.01, 0.05, 0.1),symbol = "\\circ",custom.model.names = c("ZINF with coalition features", "ZINF with country features"),scalebox = 0.8,caption="Estimates for the k-adic zero inflated negative binomial models. Unit of analysis is a potential coalition-year. Outcome variables are the months a potential coalition is realized as a tacit coalition in a given year. Models include country-years with at least three actors (including the government).",use.packages = FALSE)
}

#------------------------
#Table A6

if(iii==1){
#generate table for all models
coef.names.interact <- c("Intercept","Coalition Gini","Proportion of Area Active","Average Distance in Coalition","Ethnic Linkage","Average Distance in Coalition $\\times$ Ethnic Linkage","Coalition Size","Infighting","Government Coalition","Possible Coalitions","Dependent Lag","Sq. Proportion of Area Active")

setwd(pathGraph)
texreg(list(model.2.interact),custom.coef.names = coef.names.interact,label=paste("tab:coefInteract",ending[iii],sep=""),file=paste("tab_coef_interact",ending[iii],".tex",sep=""),stars = c(0.001, 0.01, 0.05, 0.1),symbol = "\\circ",custom.model.names = c("Interaction Model"),scalebox = 0.8,caption="Estimates for the k-adic negative binomial model. Unit of analysis is a potential coalition-year. Outcome variables are the months a potential coalition is realized as a tacit coalition in a given year. Models include country-years with at least three actors (including the government).",use.packages = FALSE)
}

#------------------------
#Figure A4 right panel
if(iii==1){
p.model.1 <- predict(model.2.r,type="response")
	y.model.1 <- run.data[run.data$count.possible.coalitions>=3,]$count
		dev.model.1 <- y.model.1-p.model.1
			predict.eval <- data.frame(y.model.1,p.model.1,dev.model.1)
				colnames(predict.eval) <- c("y","p","d")

p2 <- ggplot(predict.eval,aes(x=as.factor(y),y=d))+
		geom_boxplot()+geom_point()+ylim(c(-40,40))+ xlab("Observed Realized Tacit Coalitions Months")+ ylab(paste("Prediction Error, ","RMSE=",print(round(sqrt(mean((model.1$y-p.model.1)^2)),digits=3)),sep="")) 
		
setwd(pathGraph)    
pdf(paste("plot_InMR",ending[iii],".pdf",sep=""),height=5/1.63,width=5)
p2 
dev.off()
}

#------------------------
#Table A5

if(iii==1){
model.2.3 <- glm.nb(specification2,data=run.data[run.data$count.possible.coalitions>=3,])
model.2.4 <- glm.nb(specification2,data=run.data[run.data$count.possible.coalitions>=4,])
model.2.5 <- glm.nb(specification2,data=run.data[run.data$count.possible.coalitions>=5,])
model.2.6 <- glm.nb(specification2,data=run.data[run.data$count.possible.coalitions>=6,])

#generate table for all models
coef.names <- c("Intercept","Coalition Gini","Proportion of Area Active","Average Distance in Coalition","Ethnic Linkage","Coalition Size","Infighting","Government Coalition","Possible Coalitions","Dependent Lag","Sq. Proportion of Area Active")

setwd(pathGraph)
texreg(list(model.2.3,model.2.4, model.2.5,model.2.6),custom.coef.names = coef.names,label=paste("tab:coefNBrobust",ending[iii],sep=""),file=paste("tab_coef_nb_potential",ending[iii],".tex",sep=""),stars = c(0.001, 0.01, 0.05, 0.1),symbol = "\\circ",custom.model.names = c("Base Model", "GEqual 4","GEqual 5","GEqual 6"),scalebox = 0.8,caption="Estimates for the k-adic negative binomial models. Unit of analysis is a potential coalition-year. Outcome variables are the months a potential coalition is realized as a tacit coalition in a given year. Models include country-years with at least three, four, five, and six actors (including the government).",use.packages = FALSE)

}

}





